load("XSTSF_production.RData")
source('functions.R')
datasets
f0_all_ct <- f0_all_pre %>% filter(focus_condition == 'ct' ) %>%
group_by(speaker) %>%
mutate(norm_f0 = scale(log(f0))) %>%
ungroup() %>%
mutate(
time = as.numeric(time),
syllable_no = case_when(
time > 0 & time < 11 ~ 1,
time > 10 & time < 21 ~ 2,
time > 20 & time < 31 ~ 3
),
sync_tone1 = ifelse(sync_tone1 == 'RF', 'LHL', sync_tone1),
sync_tone2 = ifelse(sync_tone2 == 'RF', 'LHL', sync_tone2),
sync_tone3 = ifelse(sync_tone3 == 'RF', 'LHL', sync_tone3))
f0_tri_ct <- f0_all_ct %>%
filter(diortri == 'tri' & sandhi_tone != 'outlier') %>%
mutate(sync_tone23 = paste0(sync_tone2, '_', sync_tone3),
hist_tone23 = paste0(hist_tone2, '_', hist_tone3),
hist_tone23_mapped = gsub("yinping", "Ia",
gsub("yangping", "Ib",
gsub("yinshang", "IIa",
gsub("yangshang", "IIb",
gsub("yinqu", "IIIa",
gsub("yangqu", "IIIb", hist_tone23)))))),
all_tone = paste(sync_tone1, hist_tone23_mapped, sep = "_"),
sandhi_tone = ifelse(sandhi_tone == 'HHL', 'MML',
ifelse(sandhi_tone == 'LHL', 'MHL', sandhi_tone)))
f0_tri_ct_yp <- f0_tri_ct %>% filter(hist_tone1 == 'yinping')
f0_tri_ct_yap <- f0_tri_ct %>% filter(hist_tone1 == 'yangping')
f0_tri_ct_ys <- f0_tri_ct %>% filter(hist_tone1 == 'yinshang')
f0_tri_ct_yas <- f0_tri_ct %>% filter(hist_tone1 == 'yangshang')
functions
distri_prop <- function(df, x, y, z = NULL) {
x <- rlang::enquo(x)
y <- rlang::enquo(y)
z <- rlang::enquo(z)
# Define a custom color palette for each sync_tone level
custom_palette <- c(
"HH" = "#4477AA",
"HL" = "#CC6677",
"LHL" = "#DDCC77",
"LH" = "#117733",
"HHL" = "#4477AA",
"HLM" = "#CC6677",
"HML" = "#117733",
"MMH" = "purple",
"MMM" = "black",
"LLH" = "#4477AA",
"LMH" = "#CC6677"
)
# Calculate counts
df1 <- df %>%
filter(time == 1) %>%
group_by(!!x, !!y, !!z) %>%
count() %>%
ungroup() %>%
group_by(!!x, !!z) %>%
mutate(count = sum(n),
prop = n / count) %>%
ungroup()
# Plot data
p <- ggplot(df1, aes(x = !!x, y = prop, fill = !!y,
label = ifelse(prop > 0.15, paste0(round(prop * 100), '%\n(', n, ')'),
paste0(round(prop * 100), '%')))) +
geom_bar(position = "stack", stat = "identity") +
geom_text(size = 5, family = 'Times New Roman', position = position_stack(vjust = 0.5)) +
scale_fill_manual(values = custom_palette) +
theme_classic() +
ylab('frequency proportion') +
theme(text = element_text(size = 20, family = 'Times New Roman'),
axis.text.x = element_text(angle = 45, hjust = 1)) +
facet_wrap(vars(!!z), scales = "free_x") +
scale_y_continuous(labels = scales::percent)
return(p)
}
distri_prop2 <- function(df, x, y) {
x <- rlang::enquo(x)
y <- rlang::enquo(y)
# Group by x, y
df1 <- df %>%
filter(time == 1) %>%
group_by(!!x, !!y) %>%
count() %>%
ungroup() %>%
group_by(!!x) %>%
mutate(count = sum(n),
prop = n / count) %>%
ungroup()
p <- ggplot(df1, aes(x = !!x, y = prop, fill = !!y,
label = ifelse(prop > 0.15, paste0(round(prop * 100), '%\n(', n, ')'),
paste0(round(prop * 100), '%')))) +
geom_bar(position = "stack", stat = "identity") +
geom_text(size = 5, family = 'Times New Roman', position = position_stack(vjust = 0.5)) +
theme_minimal() +
labs(fill = 'sandhi category') +
ylab('frequency proportion') +
theme(text = element_text(size = 20, family = 'Times New Roman'),
axis.text.x = element_text(angle = 45, hjust = 1))+
scale_y_continuous(labels = scales::percent)
return(p)
}
f0_tri_ct_yp %>%
distri_prop(hist_tone1, sync_tone1, syntax)+
xlab("historical tone")+
labs(fill = "synchronic tone")
f0_tri_ct_yp %>%
filter(startsWith(hist_tone2, "yin")) %>%
distri_prop(hist_tone2, sync_tone2, syntax)+
xlab("historical tone")+
labs(fill = "synchronic tone")
f0_tri_ct_yp %>%
filter(startsWith(hist_tone3, "yin")) %>%
distri_prop(hist_tone3, sync_tone3, syntax)+
xlab("historical tone")+
labs(fill = "synchronic tone")
f0_tri_ct_yp %>%
filter(startsWith(hist_tone2, "yang")) %>%
distri_prop(hist_tone2, sync_tone2, syntax)+
xlab("historical tone")+
labs(fill = "synchronic tone")
f0_tri_ct_yp %>%
filter(startsWith(hist_tone3, "yang")) %>%
distri_prop(hist_tone3, sync_tone3, syntax)+
xlab("historical tone")+
labs(fill = "synchronic tone")
Auditory categorisation
unique(f0_tri_ct_yp$sandhi_tone)
## [1] "HLM" "MHL" "HML" "MMH" "MML"
p_cluster(f0_tri_ct_yp, sandhi_tone)
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
k-means clustering
# data preparation
f0_tri_ct_yp_kmeans <- f0_tri_ct_yp %>%
select(-diortri, -syllable_no, -focus_no, -f0) %>%
spread(time, norm_f0)
# k-means clustering
cluster_model <- k_means_clustering(f0_tri_ct_yp_kmeans)
kml(cluster_model, nbClusters = 2:10)
## ~ Fast KmL ~
## ***************************************************************************************************S
## 100 ********************************************************************************S
kml::plot(cluster_model, 2, parTraj=parTRAJ(col="clusters"))
kml::plot(cluster_model, 3, parTraj=parTRAJ(col="clusters"))
kml::plot(cluster_model, 4, parTraj=parTRAJ(col="clusters"))
kml::plot(cluster_model, 5, parTraj=parTRAJ(col="clusters"))
plotAllCriterion(cluster_model)
# get cluster results
f0_tri_ct_yp_kmeans <- f0_tri_ct_yp_kmeans %>%
mutate(cluster3 = getClusters(cluster_model, 3),
cluster5 = getClusters(cluster_model, 5))
f0_tri_ct_yp_kmeans <- wide_to_long(f0_tri_ct_yp_kmeans) %>%
mutate(syllable_no = ifelse(time < 11, '1',
ifelse(time < 21, '2', '3'))) %>%
mutate(cluster3_reorder = case_when(cluster3 == 'B' ~ 'A',
cluster3 == 'C' ~ 'B',
cluster3 == 'A' ~ 'C'),
cluster5_reorder = case_when(cluster5 == 'C' ~ 'A',
cluster5 == 'A' ~ 'B',
cluster5 == 'D' ~ 'C',
cluster5 == 'E' ~ 'D',
cluster5 == 'B' ~ 'E'))
k-means cluster visualisation
f0_tri_ct_yp_kmeans <- f0_tri_ct_yp_kmeans %>%
mutate(cluster3_reorder = case_when(cluster3 == 'B' ~ 'A',
cluster3 == 'C' ~ 'B',
cluster3 == 'A' ~ 'C'),
cluster5_reorder = case_when(cluster5 == 'C' ~ 'A',
cluster5 == 'A' ~ 'B',
cluster5 == 'D' ~ 'C',
cluster5 == 'E' ~ 'D',
cluster5 == 'B' ~ 'E'))
p_kmeans3 <- p_cluster(f0_tri_ct_yp_kmeans, cluster3_reorder);p_kmeans3
p_kmeans5 <- p_cluster(f0_tri_ct_yp_kmeans, cluster5_reorder);p_kmeans5
heatmap distribution
heatmap_df <- heatmap_data(f0_tri_ct_yp_kmeans, cluster3_reorder)
p_htmap3 <- compare_cluster(heatmap_df, 'cluster3_reorder'); p_htmap3
heatmap_df <- heatmap_data(f0_tri_ct_yp_kmeans, cluster5_reorder)
p_htmap5 <- compare_cluster(heatmap_df, 'cluster5_reorder'); p_htmap5
H-M-L system validation
# monosyllable
unique(f0_all_ct$diortri) # check diortri value for monosyllables
## [1] NA "di" "tri"
f0_mono_ct <- f0_all_ct %>%
filter(is.na(diortri) == TRUE) %>%
mutate(syllable_no = 1) # select monosyllabic data
unique(f0_mono_ct$token) # check if dataset is correct
## [1] "青" "书" "椒" "苦" "包" "手" "瓜" "樱" "机" "花" "海" "菜" "带" "草" "桃"
## [16] "莓" "水" "房" "牛" "链" "豆" "扁" "黄" "梅" "皮" "袄" "油" "杨" "鞋" "小"
## [31] "新" "车" "店" "门" "树" "路" "老" "布" "绳" "开" "修" "锁" "采" "买" "造"
## [46] "茶" "纸" "饭" "船" "工" "人" "籽" "厂" "毛"
unique(f0_mono_ct$citation_tone) # check tone inventory
## [1] "HH" "HL" "RF" "LH"
unique(f0_mono_ct$syllable_no)
## [1] 1
p_cluster(f0_mono_ct, citation_tone)
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
p_cluster(f0_mono_ct, citation_tone, 'speaker')
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
# S6's mono tones
f0_mono_s6 <- filter(f0_mono_ct, speaker == 'S6')
p_cluster(f0_mono_s6, citation_tone)
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
plotting sandhi patterns
# yinping-initial disyllabic dataset
f0_di_ct_lcmh_h <- f0_all_ct %>%
filter(syntax %in% c('L', 'M') & diortri == 'di' & sync_tone1 %in% c('HH', 'HL')) %>%
mutate(sandhi_tone = case_when(sandhi_tone == 'HLLM' ~ 'HHML',
sandhi_tone == 'HMML' ~ 'HHML',
sandhi_tone == 'LLHL' ~ 'LLRF',
.default = sandhi_tone)) %>%
filter(!ind_no %in% c('S2_1_ct', 'S2_11_ct', 'S2_27_ct',
'S3_5_ct', 'S3_19_ct', 'S5_27_ct')) %>%
filter(is.na(sandhi_tone) == FALSE,
sandhi_tone != 'HHHH')
unique(f0_di_ct_lcmh_h$sandhi_tone) # check sandhi categories
## [1] "HHML" "MHHL" "MMMH"
# plotting
p_cluster_cont(f0_di_ct_lcmh_h, sandhi_tone)+
labs(title = 'Disyllabic sandhi')+
theme(plot.title = element_text(hjust = 0.5, size = 20, face = "bold"))
p_cluster_cont(f0_tri_ct_yp, sandhi_tone)+
scale_color_manual(values = c("#4477AA", "#66CCEE", "#DDCC77", "#CC6677", "#9B72C7"))+
labs(title = 'Trisyllabic sandhi')+
theme(plot.title = element_text(hjust = 0.5, size = 20, face = "bold"))
# check individual contours
# ggplotly(draw_by(f0_di_ct_lcmh_h, 'sandhi_tone'), tooltip = c('text', 'x'))
direct comparions between di & tri sandhi
# direct comparisons of similar sandhi contours
f0_tri_yp_comp <- f0_tri_ct_yp %>%
filter(sandhi_tone %in% c('HLM', 'MHL', 'MMH')) %>%
select(-sync_tone23, -hist_tone23, -hist_tone23_mapped, -all_tone) %>%
mutate(sandhi_category = case_when(sandhi_tone == 'HLM' ~ 'Fall',
sandhi_tone == 'MHL' ~ 'Rise-fall',
.default = 'Rise'),
sandhi_tone = paste0('tri: ', sandhi_tone))
f0_di_yp_comp <- f0_di_ct_lcmh_h %>%
mutate(sandhi_category = case_when(sandhi_tone == 'HHML' ~ 'Fall',
sandhi_tone == 'MHHL' ~ 'Rise-fall',
.default = 'Rise'),
sandhi_tone = paste0('di: ', sandhi_tone))
f0_yp_comp <- rbind(f0_tri_yp_comp, f0_di_yp_comp) %>%
mutate(sandhi_tone = factor(sandhi_tone,
levels = c('di: HHML', 'tri: HLM',
'di: MMMH', 'tri: MMH',
'di: MHHL', 'tri: MHL')))
p_cluster_cont(f0_yp_comp, sandhi_tone, 'sandhi_category')+
scale_color_manual(values = c("#4477AA", "#88D3E9", "#DDCC77", "#AA9944","#CC6677", "#882255"))+
labs(title = 'Comparisons of di- and tri-syllabic sandhi')+
theme(plot.title = element_text(hjust = 0.5, size = 20, face = "bold"))
overall distribution comparison
distri_prop2(f0_yp_comp, diortri, sandhi_category)
trisyllabic sandhi patterns
f0_tri_ct_yp_12mh <- f0_tri_ct_yp %>% filter(syntax == "1+2MH")
f0_tri_ct_yp_21ll <- f0_tri_ct_yp %>% filter(syntax == "2+1LL")
f0_tri_ct_yp_21vl <- f0_tri_ct_yp %>% filter(syntax == "2+1VL")
# overall
f0_tri_ct_yp %>% distri_prop2(syntax, sandhi_tone)
f0_tri_ct_yp %>% distri_prop(hist_tone23_mapped, sandhi_tone, syntax)
f0_tri_ct_yp %>% distri_prop2(speaker, sandhi_tone)
# 1+2MH
f0_tri_ct_yp_12mh %>%
filter(sync_tone1 == 'HH') %>%
distri_prop2(all_tone, sandhi_tone)
f0_tri_ct_yp_12mh %>%
filter(sync_tone1 == 'HL') %>%
distri_prop2(hist_tone23_mapped, sandhi_tone)
# 2+1 LL
#distri_prop2(f0_tri_ct_yp_21ll, sync_tone1, sandhi_tone)
#distri_prop(f0_tri_ct_yp_21ll, hist_tone3, sandhi_tone, hist_tone2)
f0_tri_ct_yp_21ll %>%
filter(sync_tone1 == 'HH') %>%
distri_prop2(hist_tone23_mapped, sandhi_tone)
f0_tri_ct_yp_21ll %>%
filter(sync_tone1 == 'HL') %>%
distri_prop2(hist_tone23_mapped, sandhi_tone)
# 2+1VL
#distri_prop2(f0_tri_ct_yp_21vl, sync_tone1, sandhi_tone)
#distri_prop(f0_tri_ct_yp_21vl, hist_tone3, sandhi_tone, hist_tone2
f0_tri_ct_yp_21vl %>%
filter(sync_tone1 == 'HH') %>%
distri_prop2(hist_tone23_mapped, sandhi_tone)
f0_tri_ct_yp_21vl %>%
filter(sync_tone1 == 'HL') %>%
distri_prop2(hist_tone23_mapped, sandhi_tone)
f0_tri_ct_yp_12mh %>% filter(sandhi_tone == 'HML') %>% select(ind_no) %>% distinct()
## # A tibble: 8 × 1
## ind_no
## <chr>
## 1 S1_27_ct
## 2 S1_35_ct
## 3 S3_27_ct
## 4 S3_35_ct
## 5 S4_27_ct
## 6 S4_35_ct
## 7 S8_27_ct
## 8 S8_35_ct
f0_tri_ct_yp_21ll %>% filter(sandhi_tone == 'HML') %>% select(ind_no) %>% distinct()
## # A tibble: 3 × 1
## ind_no
## <chr>
## 1 S8_23_ct
## 2 S8_38_ct
## 3 S8_44_ct
f0_tri_ct_yp_21vl %>% filter(sandhi_tone == 'HML') %>% select(ind_no) %>% distinct()
## # A tibble: 3 × 1
## ind_no
## <chr>
## 1 S1_43_ct
## 2 S4_43_ct
## 3 S8_43_ct
build model
f0_tri_ct_yp <- f0_tri_ct_yp %>%
mutate(sandhi_tone = as.factor(sandhi_tone),
sync_tone1 = as.factor(sync_tone1),
hist_tone2 = as.factor(hist_tone2),
hist_tone3 = as.factor(hist_tone3),
syntax = as.factor(syntax),
speaker = as.factor(speaker))
model <- multinom(sandhi_tone ~ sync_tone1 + hist_tone2 + hist_tone3 + syntax + speaker, data = f0_tri_ct_yp)
summary(model)
model_a1 <- multinom(sandhi_tone ~ sync_tone1 + hist_tone2 + hist_tone3 + speaker*syntax, data = f0_tri_ct_yp)
anova(model_a1, model, test = "Chisq")
model_1 <- multinom(sandhi_tone ~ hist_tone2 + hist_tone3 + syntax + speaker, data = f0_tri_ct_yp)
anova(model_1, model, test = "Chisq")
model_2 <- multinom(sandhi_tone ~ sync_tone1 + hist_tone3 + syntax + speaker, data = f0_tri_ct_yp)
anova(model_2, model, test = "Chisq")
model_3 <- multinom(sandhi_tone ~ sync_tone1 + hist_tone2 + syntax + speaker, data = f0_tri_ct_yp)
anova(model_3, model, test = "Chisq")
model_4 <- multinom(sandhi_tone ~ sync_tone1 + hist_tone2 + hist_tone3 + syntax, data = f0_tri_ct_yp)
anova(model_4, model, test = "Chisq")
model_5 <- multinom(sandhi_tone ~ sync_tone1 + hist_tone2 + hist_tone3 + speaker, data = f0_tri_ct_yp)
anova(model_5, model, test = "Chisq")
unique(f0_tri_ct_ys$sandhi_tone)
## [1] "MHL" "HLM" "HML" "MMH" "MMM"
p_cluster(f0_tri_ct_ys, sandhi_tone)
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Warning: Removed 9 rows containing non-finite values (`stat_summary()`).
## Warning: Removed 9 rows containing missing values (`geom_line()`).
monosyllable monosyllabic tones
f0_tri_ct_ys %>%
distri_prop(hist_tone1, sync_tone1, syntax)+
labs(fill = "synchronic tone")
f0_tri_ct_ys %>%
filter(startsWith(hist_tone2, "yin")) %>%
distri_prop(hist_tone2, sync_tone2, syntax)+
labs(fill = "synchronic tone")
f0_tri_ct_ys %>%
filter(startsWith(hist_tone3, "yin")) %>%
distri_prop(hist_tone3, sync_tone3, syntax)+
labs(fill = "synchronic tone")
f0_tri_ct_ys %>%
filter(startsWith(hist_tone2, "yang")) %>%
distri_prop(hist_tone2, sync_tone2, syntax)+
labs(fill = "synchronic tone")
f0_tri_ct_ys %>%
filter(startsWith(hist_tone3, "yang")) %>%
distri_prop(hist_tone3, sync_tone3, syntax)+
labs(fill = "synchronic tone")
trisyllabic sandhi patterns
unique(f0_tri_ct_ys$syntax)
## [1] "1+2MH" "1+2VO" "2+1LL"
f0_tri_ct_ys_12mh <- f0_tri_ct_ys %>% filter(syntax == "1+2MH")
f0_tri_ct_ys_21ll <- f0_tri_ct_ys %>% filter(syntax == "2+1LL")
f0_tri_ct_ys_12vo <- f0_tri_ct_ys %>% filter(syntax == "1+2VO")
# 1+2MH
f0_tri_ct_ys_12mh %>%
filter(sync_tone1 == 'HH') %>%
distri_prop2(hist_tone23_mapped, sandhi_tone)
f0_tri_ct_ys_12mh %>%
filter(sync_tone1 == 'HL') %>%
distri_prop2(hist_tone23_mapped, sandhi_tone)
# 2+1 LL
f0_tri_ct_ys_21ll %>%
filter(sync_tone1 == 'HH') %>%
distri_prop2(hist_tone23_mapped, sandhi_tone)
f0_tri_ct_ys_21ll %>%
filter(sync_tone1 == 'HL') %>%
distri_prop2(hist_tone23_mapped, sandhi_tone)
# 1+2VO
f0_tri_ct_ys_12vo %>%
filter(sync_tone1 == 'HH') %>%
distri_prop2(hist_tone23_mapped, sandhi_tone)
f0_tri_ct_ys_12vo %>%
filter(sync_tone1 == 'HL') %>%
distri_prop2(hist_tone23_mapped, sandhi_tone)
## yangshang-initial
unique(f0_tri_ct_yas$sandhi_tone)
## [1] "MHL" "LLH" "LMH"
p_cluster(f0_tri_ct_yas, sandhi_tone)
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Warning: Removed 1 rows containing non-finite values (`stat_summary()`).
monosyllabic tones
f0_tri_ct_yas %>%
distri_prop(hist_tone1, sync_tone1, syntax)+
labs(fill = "synchronic tone")
f0_tri_ct_yas %>%
filter(startsWith(hist_tone2, "yin")) %>%
distri_prop(hist_tone2, sync_tone2, syntax)+
labs(fill = "synchronic tone")
f0_tri_ct_yas %>%
filter(startsWith(hist_tone3, "yin")) %>%
distri_prop(hist_tone3, sync_tone3, syntax)+
labs(fill = "synchronic tone")
f0_tri_ct_yas %>%
filter(startsWith(hist_tone2, "yang")) %>%
distri_prop(hist_tone2, sync_tone2, syntax)+
labs(fill = "synchronic tone")
f0_tri_ct_yas %>%
filter(startsWith(hist_tone3, "yang")) %>%
distri_prop(hist_tone3, sync_tone3, syntax)+
labs(fill = "synchronic tone")
unique(f0_tri_ct_yas$syntax)
## [1] "1+2MH" "1+2VO" "2+1VL"
f0_tri_ct_yas_12mh <- f0_tri_ct_yas %>% filter(syntax == "1+2MH")
f0_tri_ct_yas_21vl <- f0_tri_ct_yas %>% filter(syntax == "2+1VL")
f0_tri_ct_yas_12vo <- f0_tri_ct_yas %>% filter(syntax == "1+2VO")
# 1+2MH
f0_tri_ct_yas_12mh %>%
filter(sync_tone1 == 'LHL') %>%
distri_prop2(hist_tone23_mapped, sandhi_tone)
# 2+1 VL
f0_tri_ct_yas_21vl %>%
filter(sync_tone1 == 'LHL') %>%
distri_prop2(hist_tone23_mapped, sandhi_tone)
f0_tri_ct_yas_21vl %>%
filter(sync_tone1 == 'LH') %>%
distri_prop2(hist_tone23_mapped, sandhi_tone)
# 1+2VO
f0_tri_ct_yas_12vo %>%
filter(sync_tone1 == 'LHL') %>%
distri_prop2(hist_tone23_mapped, sandhi_tone)
f0_tri_ct_yas_12vo %>%
filter(sync_tone1 == 'LH') %>%
distri_prop2(hist_tone23_mapped, sandhi_tone)